# -*- coding: utf-8 -*-
import copy
import datetime
from pymongo import MongoClient, UpdateOne
from pymongo.errors import DuplicateKeyError
from traceback import format_exc
from zc_core.model.items import *
from zc_core.util.encrypt_util import md5
from zc_core.util.batch_gen import batch_to_year, time_to_batch_no


class BoxPipeline(object):

    def __init__(self, mongo_uri, bot_name):
        self.mongo_uri = mongo_uri
        self.bot_name = bot_name
        self.client = None
        self.db_map = dict()

    @classmethod
    def from_crawler(cls, crawler):
        settings = crawler.settings
        return cls(
            mongo_uri=settings.get('MONGODB_URI'),
            bot_name=settings.get('BOT_NAME')
        )

    def open_spider(self, spider):
        _ = spider
        self.client = MongoClient(self.mongo_uri)
        # 默认初始化当前年的库
        year = str(datetime.datetime.now().year)
        self.db_map[year] = self.client['{}_{}'.format(self.bot_name, year)]

    def get_db(self, batch_no=None, year=None):
        if not batch_no and not year:
            raise Exception('批次编号与年份至少指定一个')
        if not year:
            year = batch_to_year(batch_no)
        db = self.db_map.get(year)
        if not db:
            db = self.client['{}_{}'.format(self.bot_name, year)]
            self.db_map[year] = db

        return db

    def close_spider(self, spider):
        _ = spider
        self.client.close()

    def process_item(self, item, spider):
        try:
            if not item:
                raise DropItem("drop empty item： [{}]".format(self))

            if isinstance(item, Box) and item.validate():
                batch_no = item.get('batchNo')
                data_list = item.get('data')

                # sku
                if item.get('tag') == 'sku' or item.get('tag') == 'sku_pool':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        to_save['offlineTime'] = 0
                        bulk_list.append(UpdateOne({'_id': to_save.pop("skuId")}, {'$set': to_save}, upsert=True))
                    if item.get('tag') == 'sku':
                        self.get_db(batch_no)['sku_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['sku_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # item
                elif item.get('tag') == 'item' or item.get('tag') == 'item_pool':
                    bulk_list = list()
                    sku_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        sku_id = to_save.pop("skuId")
                        bulk_list.append(UpdateOne({'_id': sku_id}, {'$set': to_save}, upsert=True))
                        sku_list.append(sku_id)
                    if item.get('tag') == 'item':
                        self.get_db(batch_no)['data_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['item_data_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['sku_pool'].update_many({'_id': {'$in': sku_list}}, {'$set': {'offlineTime': 0}}, upsert=False)
                    return item
                #待定
                elif item.get('tag') == 'item_supplier' or item.get('tag') == 'item_supplier_pool':
                    bulk_list = list()
                    sku_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        sku_id = to_save.pop("skuId")
                        bulk_list.append(UpdateOne({'_id': sku_id}, {'$set': to_save}, upsert=True))
                        sku_list.append(sku_id)
                    if item.get('tag') == 'item_supplier':
                        self.get_db(batch_no)['item_supplier_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['item_supplier_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # order
                elif item.get('tag') == 'order':
                    year_bulk_map = dict()
                    for data in data_list:
                        # 补充采购单位编号
                        order_dept = data.get('orderDept')
                        if order_dept and not data.get('deptId'):
                            data['deptId'] = md5(order_dept)
                        # 计算批次编号
                        batch_no = data.get('batchNo')
                        if not batch_no and data.get('orderTime', None):
                            # 要求：本地时间(东八区)
                            order_time = data.get('orderTime', '')
                            if order_time:
                                batch_no = time_to_batch_no(order_time)
                                data['batchNo'] = batch_no
                        if batch_no:
                            # 20190710 -> 2019
                            year = str(batch_no)[:-4]
                            to_save = copy.deepcopy(data)
                            bulk_list = year_bulk_map.get(year, [])
                            bulk_list.append(UpdateOne({'_id': to_save.pop("id")}, {'$set': to_save}, upsert=True))
                            year_bulk_map[year] = bulk_list
                    for year, bulk_list in year_bulk_map.items():
                        self.get_db(year=year)['order_{}'.format(year)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # order_item
                elif item.get('tag') == 'order_item':
                    year_month_bulk_map = dict()
                    for data in data_list:
                        # 补充采购单位编号
                        order_dept = data.get('orderDept')
                        if order_dept and not data.get('deptId'):
                            data['deptId'] = md5(order_dept)
                        # 计算批次编号
                        batch_no = time_to_batch_no(data.get('orderTime'))
                        if batch_no:
                            # 20190710 -> 201907
                            year_month = str(batch_no)[:-2]
                            to_save = copy.deepcopy(data)
                            bulk_list = year_month_bulk_map.get(year_month, [])
                            bulk_list.append(UpdateOne({'_id': to_save.pop("id")}, {'$set': to_save}, upsert=True))
                            year_month_bulk_map[year_month] = bulk_list
                    for year_month, bulk_list in year_month_bulk_map.items():
                        year = str(year_month)[:-2]
                        self.get_db(year=year)['order_item_{}'.format(year_month)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # miss_sku
                elif item.get('tag') == 'miss_sku':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("skuId")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['miss_sku'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # miss_item_data
                elif item.get('tag') == 'miss_item_data':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("skuId")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['miss_item_data'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # spu
                elif item.get('tag') == 'spu':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        to_save['offlineTime'] = 0
                        bulk_list.append(UpdateOne({'_id': to_save.pop("spuId")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['spu_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['spu_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # cat
                elif item.get('tag') == 'catalog':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("catalogId")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['cat_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['catalog_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # supplier
                elif item.get('tag') == 'supplier':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("id")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['supplier_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['supplier_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # brand
                elif item.get('tag') == 'brand':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("id")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['brand_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['brand_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # area
                elif item.get('tag') == 'area':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("areaId")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['area_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    self.get_db(batch_no)['area_pool'].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item

                # bid_item
                elif item.get('tag') == 'bid_item':
                    bulk_list = list()
                    for data in data_list:
                        data['batchNo'] = batch_no
                        to_save = copy.deepcopy(data)
                        bulk_list.append(UpdateOne({'_id': to_save.pop("itemId")}, {'$set': to_save}, upsert=True))
                    self.get_db(batch_no)['bid_item_{}'.format(batch_no)].bulk_write(bulk_list, ordered=False, bypass_document_validation=True)
                    return item
        except DuplicateKeyError:
            spider.logger.debug('duplicate key error collection')
        except Exception as e:
            _ = e
            spider.logger.error(format_exc())
        return item
